In [3]:
import sys
!conda install --yes --prefix {sys.prefix} -c anaconda scikit-learn
!{sys.executable} -m pip install pandas-profiling==2.3.0
Collecting package metadata (current_repodata.json): ...working... done
Solving environment: ...working... done

# All requested packages already installed.

Collecting pandas-profiling==2.3.0
  Downloading https://files.pythonhosted.org/packages/2c/2f/aae19e2173c10a9bb7fee5f5cad35dbe53a393960fc91abc477dcc4661e8/pandas-profiling-2.3.0.tar.gz (127kB)
Requirement already satisfied: pandas>=0.19 in d:\anaconda3\lib\site-packages (from pandas-profiling==2.3.0) (0.24.2)
Requirement already satisfied: matplotlib>=1.4 in d:\anaconda3\lib\site-packages (from pandas-profiling==2.3.0) (3.1.0)
Requirement already satisfied: jinja2>=2.8 in d:\anaconda3\lib\site-packages (from pandas-profiling==2.3.0) (2.10.1)
Collecting missingno>=0.4.2 (from pandas-profiling==2.3.0)
  Downloading https://files.pythonhosted.org/packages/2b/de/6e4dd6d720c49939544352155dc06a08c9f7e4271aa631a559dfbeaaf9d4/missingno-0.4.2-py3-none-any.whl
Collecting htmlmin>=0.1.12 (from pandas-profiling==2.3.0)
  Downloading https://files.pythonhosted.org/packages/b3/e7/fcd59e12169de19f0131ff2812077f964c6b960e7c09804d30a7bf2ab461/htmlmin-0.1.12.tar.gz
Collecting phik>=0.9.8 (from pandas-profiling==2.3.0)
  Downloading https://files.pythonhosted.org/packages/45/ad/24a16fa4ba612fb96a3c4bb115a5b9741483f53b66d3d3afd987f20fa227/phik-0.9.8-py3-none-any.whl (606kB)
Collecting confuse>=1.0.0 (from pandas-profiling==2.3.0)
  Downloading https://files.pythonhosted.org/packages/4c/6f/90e860cba937c174d8b3775729ccc6377eb91f52ad4eeb008e7252a3646d/confuse-1.0.0.tar.gz
Requirement already satisfied: astropy in d:\anaconda3\lib\site-packages (from pandas-profiling==2.3.0) (3.2.1)
Requirement already satisfied: python-dateutil>=2.5.0 in c:\users\ricky\appdata\roaming\python\python37\site-packages (from pandas>=0.19->pandas-profiling==2.3.0) (2.7.5)
Requirement already satisfied: numpy>=1.12.0 in d:\anaconda3\lib\site-packages (from pandas>=0.19->pandas-profiling==2.3.0) (1.16.4)
Requirement already satisfied: pytz>=2011k in d:\anaconda3\lib\site-packages (from pandas>=0.19->pandas-profiling==2.3.0) (2019.1)
Requirement already satisfied: cycler>=0.10 in d:\anaconda3\lib\site-packages (from matplotlib>=1.4->pandas-profiling==2.3.0) (0.10.0)
Requirement already satisfied: kiwisolver>=1.0.1 in d:\anaconda3\lib\site-packages (from matplotlib>=1.4->pandas-profiling==2.3.0) (1.1.0)
Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in d:\anaconda3\lib\site-packages (from matplotlib>=1.4->pandas-profiling==2.3.0) (2.4.0)
Requirement already satisfied: MarkupSafe>=0.23 in d:\anaconda3\lib\site-packages (from jinja2>=2.8->pandas-profiling==2.3.0) (1.1.1)
Requirement already satisfied: seaborn in d:\anaconda3\lib\site-packages (from missingno>=0.4.2->pandas-profiling==2.3.0) (0.9.0)
Requirement already satisfied: scipy in d:\anaconda3\lib\site-packages (from missingno>=0.4.2->pandas-profiling==2.3.0) (1.2.1)
Requirement already satisfied: nbconvert>=5.3.1 in d:\anaconda3\lib\site-packages (from phik>=0.9.8->pandas-profiling==2.3.0) (5.5.0)
Requirement already satisfied: numba>=0.38.1 in d:\anaconda3\lib\site-packages (from phik>=0.9.8->pandas-profiling==2.3.0) (0.44.1)
Requirement already satisfied: pytest>=4.0.2 in d:\anaconda3\lib\site-packages (from phik>=0.9.8->pandas-profiling==2.3.0) (5.0.1)
Collecting pytest-pylint>=0.13.0 (from phik>=0.9.8->pandas-profiling==2.3.0)
  Downloading https://files.pythonhosted.org/packages/64/dc/6f35f114844fb12e38d60c4f3d2441a55baff7043ad4e013777dff55746c/pytest_pylint-0.14.1-py3-none-any.whl
Requirement already satisfied: jupyter-client>=5.2.3 in d:\anaconda3\lib\site-packages (from phik>=0.9.8->pandas-profiling==2.3.0) (5.3.1)
Requirement already satisfied: pyyaml in c:\users\ricky\appdata\roaming\python\python37\site-packages (from confuse>=1.0.0->pandas-profiling==2.3.0) (3.13)
Requirement already satisfied: six>=1.5 in c:\users\ricky\appdata\roaming\python\python37\site-packages (from python-dateutil>=2.5.0->pandas>=0.19->pandas-profiling==2.3.0) (1.11.0)
Requirement already satisfied: setuptools in d:\anaconda3\lib\site-packages (from kiwisolver>=1.0.1->matplotlib>=1.4->pandas-profiling==2.3.0) (41.0.1)
Requirement already satisfied: mistune>=0.8.1 in d:\anaconda3\lib\site-packages (from nbconvert>=5.3.1->phik>=0.9.8->pandas-profiling==2.3.0) (0.8.4)
Requirement already satisfied: pygments in d:\anaconda3\lib\site-packages (from nbconvert>=5.3.1->phik>=0.9.8->pandas-profiling==2.3.0) (2.4.2)
Requirement already satisfied: pandocfilters>=1.4.1 in d:\anaconda3\lib\site-packages (from nbconvert>=5.3.1->phik>=0.9.8->pandas-profiling==2.3.0) (1.4.2)
Requirement already satisfied: defusedxml in d:\anaconda3\lib\site-packages (from nbconvert>=5.3.1->phik>=0.9.8->pandas-profiling==2.3.0) (0.6.0)
Requirement already satisfied: jupyter-core in d:\anaconda3\lib\site-packages (from nbconvert>=5.3.1->phik>=0.9.8->pandas-profiling==2.3.0) (4.5.0)
Requirement already satisfied: entrypoints>=0.2.2 in d:\anaconda3\lib\site-packages (from nbconvert>=5.3.1->phik>=0.9.8->pandas-profiling==2.3.0) (0.3)
Requirement already satisfied: nbformat>=4.4 in d:\anaconda3\lib\site-packages (from nbconvert>=5.3.1->phik>=0.9.8->pandas-profiling==2.3.0) (4.4.0)
Requirement already satisfied: traitlets>=4.2 in d:\anaconda3\lib\site-packages (from nbconvert>=5.3.1->phik>=0.9.8->pandas-profiling==2.3.0) (4.3.2)
Requirement already satisfied: bleach in d:\anaconda3\lib\site-packages (from nbconvert>=5.3.1->phik>=0.9.8->pandas-profiling==2.3.0) (3.1.0)
Requirement already satisfied: testpath in d:\anaconda3\lib\site-packages (from nbconvert>=5.3.1->phik>=0.9.8->pandas-profiling==2.3.0) (0.4.2)
Requirement already satisfied: llvmlite>=0.29.0 in d:\anaconda3\lib\site-packages (from numba>=0.38.1->phik>=0.9.8->pandas-profiling==2.3.0) (0.29.0)
Requirement already satisfied: py>=1.5.0 in d:\anaconda3\lib\site-packages (from pytest>=4.0.2->phik>=0.9.8->pandas-profiling==2.3.0) (1.8.0)
Requirement already satisfied: packaging in d:\anaconda3\lib\site-packages (from pytest>=4.0.2->phik>=0.9.8->pandas-profiling==2.3.0) (19.0)
Requirement already satisfied: attrs>=17.4.0 in d:\anaconda3\lib\site-packages (from pytest>=4.0.2->phik>=0.9.8->pandas-profiling==2.3.0) (19.1.0)
Requirement already satisfied: more-itertools>=4.0.0 in d:\anaconda3\lib\site-packages (from pytest>=4.0.2->phik>=0.9.8->pandas-profiling==2.3.0) (7.0.0)
Requirement already satisfied: atomicwrites>=1.0 in d:\anaconda3\lib\site-packages (from pytest>=4.0.2->phik>=0.9.8->pandas-profiling==2.3.0) (1.3.0)
Requirement already satisfied: pluggy<1.0,>=0.12 in d:\anaconda3\lib\site-packages (from pytest>=4.0.2->phik>=0.9.8->pandas-profiling==2.3.0) (0.12.0)
Requirement already satisfied: importlib-metadata>=0.12 in d:\anaconda3\lib\site-packages (from pytest>=4.0.2->phik>=0.9.8->pandas-profiling==2.3.0) (0.17)
Requirement already satisfied: wcwidth in d:\anaconda3\lib\site-packages (from pytest>=4.0.2->phik>=0.9.8->pandas-profiling==2.3.0) (0.1.7)
Requirement already satisfied: colorama in c:\users\ricky\appdata\roaming\python\python37\site-packages (from pytest>=4.0.2->phik>=0.9.8->pandas-profiling==2.3.0) (0.3.9)
Requirement already satisfied: pylint>=1.4.5 in d:\anaconda3\lib\site-packages (from pytest-pylint>=0.13.0->phik>=0.9.8->pandas-profiling==2.3.0) (2.3.1)
Requirement already satisfied: pyzmq>=13 in d:\anaconda3\lib\site-packages (from jupyter-client>=5.2.3->phik>=0.9.8->pandas-profiling==2.3.0) (18.0.0)
Requirement already satisfied: tornado>=4.1 in d:\anaconda3\lib\site-packages (from jupyter-client>=5.2.3->phik>=0.9.8->pandas-profiling==2.3.0) (6.0.3)
Requirement already satisfied: jsonschema!=2.5.0,>=2.4 in d:\anaconda3\lib\site-packages (from nbformat>=4.4->nbconvert>=5.3.1->phik>=0.9.8->pandas-profiling==2.3.0) (3.0.1)
Requirement already satisfied: ipython-genutils in d:\anaconda3\lib\site-packages (from nbformat>=4.4->nbconvert>=5.3.1->phik>=0.9.8->pandas-profiling==2.3.0) (0.2.0)
Requirement already satisfied: decorator in d:\anaconda3\lib\site-packages (from traitlets>=4.2->nbconvert>=5.3.1->phik>=0.9.8->pandas-profiling==2.3.0) (4.4.0)
Requirement already satisfied: webencodings in d:\anaconda3\lib\site-packages (from bleach->nbconvert>=5.3.1->phik>=0.9.8->pandas-profiling==2.3.0) (0.5.1)
Requirement already satisfied: zipp>=0.5 in d:\anaconda3\lib\site-packages (from importlib-metadata>=0.12->pytest>=4.0.2->phik>=0.9.8->pandas-profiling==2.3.0) (0.5.1)
Requirement already satisfied: astroid<3,>=2.2.0 in d:\anaconda3\lib\site-packages (from pylint>=1.4.5->pytest-pylint>=0.13.0->phik>=0.9.8->pandas-profiling==2.3.0) (2.2.5)
Requirement already satisfied: isort<5,>=4.2.5 in d:\anaconda3\lib\site-packages (from pylint>=1.4.5->pytest-pylint>=0.13.0->phik>=0.9.8->pandas-profiling==2.3.0) (4.3.21)
Requirement already satisfied: mccabe<0.7,>=0.6 in d:\anaconda3\lib\site-packages (from pylint>=1.4.5->pytest-pylint>=0.13.0->phik>=0.9.8->pandas-profiling==2.3.0) (0.6.1)
Requirement already satisfied: pyrsistent>=0.14.0 in d:\anaconda3\lib\site-packages (from jsonschema!=2.5.0,>=2.4->nbformat>=4.4->nbconvert>=5.3.1->phik>=0.9.8->pandas-profiling==2.3.0) (0.14.11)
Requirement already satisfied: wrapt in d:\anaconda3\lib\site-packages (from astroid<3,>=2.2.0->pylint>=1.4.5->pytest-pylint>=0.13.0->phik>=0.9.8->pandas-profiling==2.3.0) (1.11.2)
Collecting typed-ast>=1.3.0; implementation_name == "cpython" (from astroid<3,>=2.2.0->pylint>=1.4.5->pytest-pylint>=0.13.0->phik>=0.9.8->pandas-profiling==2.3.0)
  Downloading https://files.pythonhosted.org/packages/47/a1/7a24868c15d84ed7446106d6c3d73807f58232a695452c0a29679e5a1523/typed_ast-1.4.0-cp37-cp37m-win_amd64.whl (155kB)
Requirement already satisfied: lazy-object-proxy in d:\anaconda3\lib\site-packages (from astroid<3,>=2.2.0->pylint>=1.4.5->pytest-pylint>=0.13.0->phik>=0.9.8->pandas-profiling==2.3.0) (1.4.1)
Building wheels for collected packages: pandas-profiling, htmlmin, confuse
  Building wheel for pandas-profiling (setup.py): started
  Building wheel for pandas-profiling (setup.py): finished with status 'done'
  Stored in directory: C:\Users\Ricky\AppData\Local\pip\Cache\wheels\ce\c7\f1\dbfef4848ebb048cb1d4a22d1ed0c62d8ff2523747235e19fe
  Building wheel for htmlmin (setup.py): started
  Building wheel for htmlmin (setup.py): finished with status 'done'
  Stored in directory: C:\Users\Ricky\AppData\Local\pip\Cache\wheels\43\07\ac\7c5a9d708d65247ac1f94066cf1db075540b85716c30255459
  Building wheel for confuse (setup.py): started
  Building wheel for confuse (setup.py): finished with status 'done'
  Stored in directory: C:\Users\Ricky\AppData\Local\pip\Cache\wheels\b0\b2\96\2074eee7dbf7b7df69d004c9b6ac4e32dad04fb7666cf943bd
Successfully built pandas-profiling htmlmin confuse
Installing collected packages: missingno, htmlmin, pytest-pylint, phik, confuse, pandas-profiling, typed-ast
  Found existing installation: pandas-profiling 1.4.1
    Uninstalling pandas-profiling-1.4.1:
      Successfully uninstalled pandas-profiling-1.4.1
Successfully installed confuse-1.0.0 htmlmin-0.1.12 missingno-0.4.2 pandas-profiling-2.3.0 phik-0.9.8 pytest-pylint-0.14.1 typed-ast-1.4.0
In [80]:
import pandas as pd
import pandas_profiling
import numpy as np
from sklearn import preprocessing

ms1 = "one-star-michelin-restaurants.csv"
ms2 = "two-stars-michelin-restaurants.csv"
ms3 = "three-stars-michelin-restaurants.csv"

def normalizeRestaurantCsv(frame):
    #price
    priceMode = str(frame["price"].mode(dropna=True)[0])
    frame["price"] = frame["price"].replace(to_replace=np.nan, value=priceMode)
    return frame
In [81]:
print("1 Michelin Star")
ms1Frame = pd.read_csv(ms1)
ms1Frame = normalizeRestaurantCsv(ms1Frame)
ms1Frame.profile_report()
1 Michelin Star
Out[81]:

In [82]:
print("2 Michelin Stars")
ms2Frame = pd.read_csv(ms2)
ms2Frame = normalizeRestaurantCsv(ms2Frame)
ms2Frame.profile_report()
2 Michelin Stars
Out[82]:

In [83]:
print("3 Michelin Stars")
ms3Frame = pd.read_csv(ms3)
ms3Frame = normalizeRestaurantCsv(ms3Frame)
ms3Frame.profile_report()
3 Michelin Stars
Out[83]:

In [ ]: